
********* This program uses the raw register data, defines variables, and generates and saves an analysis dataset *********

clear all
set more off, permanently
cd "$localdir\Data"
global output "$localdir\Output"



/******** Load register data *********/

use registerdata19802013, clear


* All variables small letters
foreach var of varlist _all {
rename `var' `=lower("`var'") '
}



/******** Generating variables *********/

* Price index

gen pi=0
replace pi=2396/6768 if aar==1980
replace pi=2677/6768 if aar==1981
replace pi=2948/6768 if aar==1982
replace pi=3152/6768 if aar==1983
replace pi=3350/6768 if aar==1984
replace pi=3507/6768 if aar==1985
replace pi=3636/6768 if aar==1986
replace pi=3782/6768 if aar==1987
replace pi=3953/6768 if aar==1988
replace pi=4142/6768 if aar==1989
replace pi=4251/6768 if aar==1990
replace pi=4353/6768 if aar==1991
replace pi=4445/6768 if aar==1992
replace pi=4500/6768 if aar==1993
replace pi=4590/6768 if aar==1994
replace pi=4686/6768 if aar==1995
replace pi=4785/6768 if aar==1996
replace pi=4890/6768 if aar==1997
replace pi=4980/6768 if aar==1998
replace pi=5104/6768 if aar==1999
replace pi=5253/6768 if aar==2000
replace pi=5377/6768 if aar==2001
replace pi=5507/6768 if aar==2002
replace pi=5622/6768 if aar==2003
replace pi=5687/6768 if aar==2004
replace pi=5790/6768 if aar==2005
replace pi=5900/6768 if aar==2006
replace pi=6001/6768 if aar==2007
replace pi=6205/6768 if aar==2008
replace pi=6287/6768 if aar==2009
replace pi=6432/6768 if aar==2010
replace pi=6609/6768 if aar==2011
replace pi=6768/6768 if aar==2012

label variable pi "price index 2012-prices"


* Demographics 

gen year = aar
label variable year "Year"

label variable pnr "Person ID"

gen age = alder 
label variable age "Age"

gen female = 0 if koen==1
replace female = 1 if koen==2
label variable female "Female"

gen children = antboernf
label variable children "Number of children"

gen familymem = antpersf
label variable familymem "Number of family members"

destring efalle, replace
destring pnr, replace

gen single=0
replace single=1 if efalle==.
label variable single "Single"

gen muni = kom
label variable muni "Municipality code"

gen parish = sogn
label variable parish "Parish code"

gen addressdate = adrdato
label variable addressdate "Date moved to current address"

gen address = opgangikom
label variable address "Address within municipality"

gen zipcode = postnr
lab var zipcode "Zip code"

gen civstatus = .
replace civstatus = 1 if civst=="G" // Married
replace civstatus = 2 if civst=="F" // Divorced
replace civstatus = 3 if civst=="E" // Widow
replace civstatus = 4 if civst=="P" // Same sex partnership
replace civstatus = 5 if civst=="O" // Divorced, same sex partnership
replace civstatus = 6 if civst=="L" // Widow same sex partnership
replace civstatus = 7 if civst=="U" // Unmarried
replace civstatus = 8 if civst=="D" // Dead
label variable civstatus "Marital status"
label define civ 1 "Married" 2 "Divorced from marriage" 3 "Widow from marriage" 4 "Same-sex civil partnership" ///
5 "Divorced from same-sex civil partnership" 6 "Widow from same-sex civil partnership" 7 "Unmarried, no partnership" 8 "Dead" 
label values civstatus civ

g educcode=afsp1e // 8 long
replace educcode="" if h1=="90" // a few people have an educ code that begins with 90 which means unspecified/not available
lab var educcode "Education code"

rename efalle partner_pnr
label variable partner_pnr "Partner ID"

rename familie_id family_id
label variable family_id "Family ID"

rename mor_id mom_pnr
label variable mom_pnr "Mother ID"

rename far_id dad_pnr 
label variable dad_pnr "Father ID"

gen birthday = foed_dag
label variable birthday "Date of birth"

label variable yearborn "Year of birth"

gen family_type = familie_type
lab var family_type "Family type"
lab def fam 1 "Married" 2 "Same-sex partnership" 3 "Cohabitants w children" 4 "Cohabitants w/o children" 5 "Single"
lab val family_type fam

gen parentchild_mark = fm_mark
label variable parentchild_mark "Mark for living with mom,dad,both"
lab def parent 1 "Living w both parents" 2 "Living w mom + new partner" ///
3 "Living w single mom" 4 "Living w dad + new partner" 5 "Living w single dad" 6 "Not living w parents"
lab values parentchild_mark parent

gen im_type = ie_type
lab var im_type "Mark for immigrant"
lab def im 1 "Unknown" 2 "Dane" 3 "Immigrant" 4 "2nd generation immigrant"

gen country_org = opr_land
label variable country_org "Country of origin"

gen im_parents = generation
label variable im_parents "Place of birth immigrant's parents"
lab def imparent 1 "Person is Dane or 1st gen im" 2 "Parents born foreign country" 3 "Parents born in DK" ///
4 "Parents born in DK/foreign country" 5 "Parents unknown" 
  

* Assets, debt, income

gen grossincome = perindkialt
label variable grossincome "Gross income,DKK"

gen wage = qlon
label variable wage "Wage income,DKK"

gen incometax = qsluska2
label variable incometax "Total income taxes,DKK"

gen dispincome = grossincome - incometax 
label variable dispincome "Disposable income,net taxes,DKK"

gen assets_bank = bankakt
label variable assets_bank "Diposits in banks,DKK"

gen assets_bonds = oblakt
label variable assets_bonds "Market value of bonds,DKK"

gen assets_securities = pantakt
label variable assets_securities "Value of other securities,DKK"

gen assets_stocks = kursakt
label variable assets_stocks "Market value of stocks,DKK"

gen housevalue=0
replace housevalue = koejd if year<=1993
replace housevalue = ejendomsvurdering if aar>=1994
label variable housevalue "Public assessment house value,DKK"

egen totalassets = rsum(oblakt bankakt pantakt kursakt koejd)
label variable totalassets "Total assets,DKK"

egen totalassets_exhouse = rsum(oblakt bankakt pantakt kursakt)
label variable totalassets_exhouse "Total assets excl house value,DKK"

gen debt_bank = bankgaeld
label variable debt_bank "Debt in banks,DKK"

gen debt_securities = pantgaeld
label variable debt_securities "Debt other securities,DKK"

gen debt_house = oblgaeld
label variable debt_house "Debt in house,DKK"

egen totaldebt = rsum(bankgaeld pantgaeld oblgaeld)
label variable totaldebt "Total debt,DKK"

egen totaldebt_exhouse = rsum(bankgaeld pantgaeld)
label variable totaldebt_exhouse "Total debt excl house debt,DKK" 

gen houseequity = housevalue - debt_house
label variable houseequity "House value excl house debt,DKK"

gen houseintpay = rentupri
label variable houseintpay "Mortgage interest payments,DKK"

gen totalintpay = qkapud
label variable totalintpay "Total deductible interest payments,DKK"

gen homeowner = 0
replace homeowner = 1 if koejd>0 & koejd!=.
label variable homeowner "Homeowner"


* labor market 

gen primlabor = pstill
lab var primlabor "Primary labor market position"

gen seclabor = sstill
lab var seclabor "Secondary labor market position"

destring primlabor, replace
destring seclabor, replace

gen wageearner = 0 if primlabor!= .
replace wageearner = 1 if (primlabor >= 31 & primlabor <=37 & primlabor != .) | (primlabor >= 71 & primlabor <=77 & primlabor != .)
lab var wageearner "Wage earner, employed"

gen selfemp = 0 if primlabor!= .
replace selfemp = 1 if primlabor< 31 & primlabor!= .
lab var selfemp "Self-employed"

gen outside = 0 if primlabor!= .
replace outside = 1 if (primlabor >= 41 & primlabor <=56 & primlabor != .) | (primlabor >= 90 & primlabor <=98 & primlabor != .)
lab var outside "Outside labormarket - job training,retirement,student,etc" 

gen student = 0 if primlabor!= .
replace student=1 if primlabor==91 & primlabor!= .
lab var student "Student"

gen grossunemp = 0 if primlabor!= .
replace grossunemp = 1 if ///
(primlabor==40 | primlabor==45 | primlabor==46 | primlabor==47 | primlabor==48 | primlabor==51 | primlabor==52 | primlabor==57) & primlabor!= .
lab var grossunemp "Gross unemp,incl job training"

gen netunemp = 0 if primlabor!= .
replace netunemp = 1 if ///
(primlabor==40 | primlabor==57) & primlabor!= .
lab var netunemp "Net unemployment"

gen unemp = arledgr
lab var unemp "Share of the year unemployed (1/1000)"

gen ocp = discoalle_indk if year<=2009
replace ocp = disco08_alle_indk if year>=2010
lab var ocp "Occupation code (ISCO)"

* Unemployment shock - main treatment in the analysis
gen grossunemp_l=.
sort pnr year
replace grossunemp_l=grossunemp[_n-1] if pnr==pnr[_n-1] & year==year[_n-1]+1

gen grossunemp_enter=0 if grossunemp!=. & grossunemp_l!=.
replace grossunemp_enter=1 if grossunemp==1 & grossunemp_l==0 & grossunemp!=. & grossunemp_l!=.

lab var grossunemp_enter "Entered gross unemp"

* Unemployment insurance 
gen unempins = .
replace unempins = 1 if forskat=="H"
replace unempins = 2 if forskat=="D"
replace unempins = 3 if forskat=="I"
replace unempins = 4 if forskat=="K"
replace unempins = 5 if forskat=="S"
lab var unempins "Unemployment insurance coverage"
lab def insurance 1 "Full time coverage" 2 "Part time coverage" 3 "Not insured" 4 "Combined selfemp and full time" 5 "Selfemployed"
 
* Education

* Broader educational categories
destring h1, replace

sort pnr aar
gen educ_basic=(h1==10 | h1==20 | h1==25)
gen educ_short=(h1==35 | h1==40)
gen educ_medium=(h1==50 | h1==60)
gen educ_long=(h1==70 | h1==65)

label variable educ_basic "Basic educ, less than high school"
label variable educ_short "Short educ, vocational educ"
label variable educ_medium "Medium educ, bachelor's degree"
label variable educ_long "Long educ, master's degree or PhD" 

* Detailed education information
gen geneduc = almaudd
lab var geneduc "Highest achieved general education"
gen gradtime_geneduc = alm_vfra
lab var gradtime_geneduc "Date of graduation general education"
gen inst_geneduc = alminstnr
lab var inst_geneduc "Institution of general educ"

gen voceduc = erhaudd
lab var voceduc "Highest achieved vocational education"
gen gradtime_voceduc = erh_vfra
lab var gradtime_voceduc "Date of graduation vocational education"
gen inst_voceduc = erhinstnr
lab var inst_voceduc "Institution of vocational educ"

gen higheduc = hfaudd
lab var higheduc "Highest achieved education"
gen gradtime_higheduc = hf_vfra
lab var gradtime_higheduc "Date of graduation highest achieved education"
gen inst_higheduc = hfinstnr
lab var inst_higheduc "Institution of highest achieved education"

gen currenteduc = udd
lab var currenteduc "Current education"
gen gradtime_currenteduc = ig_vfra
lab var gradtime_currenteduc "Start date for current education"
gen inst_currenteduc = iginstnr
lab var inst_currenteduc "Institution of current education"


* Keep only newly generated variables (with US names and lables)

keep pnr yearborn partner_pnr family_id mom_pnr dad_pnr pi-inst_currenteduc 

* Save final dataset

save registerdata19802012_us, replace


* Save dataset with individual identifiers for the full population 2008-2012 (used in program generating industry fixed effects)

keep pnr year
keep if year>=2008 & year<=2012
save pnr_20082012, replace




